library(tidyverse)
library(ggthemes)
library(lubridate)
library(viridis)
library(zoo)
library(arm)
library(lfe)
library(ggalt)



city.data <- read.csv('Data/city_data_minimal.csv')
ois.min <- read.csv('Data/OIS_Dataset_Minimal.csv')
load("Data/lemas/2020/38651-0001-Data.rda")
lemas.2020 <- da38651.0001
rm('da38651.0001')
ucr.offenses <- readRDS('Data/UCR/offenses_known_yearly_1960_2021.rds')
dem.voteshare <- read.csv('Data/other_data/demVoteShareAllYearsFIPS.csv')
police.employees <- read.csv('Data/other_data/ucrPoliceEmployeeData.csv')

city.data$first.year <- rep(NA,dim(city.data)[1])
city.data$last.year <- rep(NA,dim(city.data)[1])
for(i in 1:nrow(city.data)){
  if(grepl('-',city.data$Range.Provided.By.City[i])){
    city.data$first.year[i] <- unlist(strsplit(as.character(city.data$Range.Provided.By.City[i]),'-',fixed=TRUE))[1]
    city.data$last.year[i] <- unlist(strsplit(as.character(city.data$Range.Provided.By.City[i]),'-',fixed=TRUE))[2]
  }
}
city.data$totalYears <- ifelse(!is.na(city.data$first.year), as.numeric(city.data$last.year)-as.numeric(city.data$first.year)+1,0)

ois.min <- ois.min %>% filter(Hit !='Miss')

############################################
############################################
##
##    Counting shootings
##
############################################
############################################

ois.counts <- ois.min %>%
  group_by(LEAR_ID) %>%
  summarize(n.shootings=n())

ois.yearly.counts <- ois.min %>%
  group_by(LEAR_ID,Year) %>%
  summarize(n.shootings=n())


years <- seq(from=2000,to=2021)
lears <- unique(city.data$LEAR_ID)
department.yearly.data <- expand.grid(lear=lears, year=years, stringsAsFactors=FALSE) %>%
  left_join(ois.yearly.counts, by=c("lear"='LEAR_ID', "year"='Year')) %>% 
  mutate(n.shootings = ifelse(is.na(n.shootings), 0, n.shootings)) %>%
  left_join(city.data %>% dplyr::select(ORI9,ORI7,STATE,LEAR_ID,FIPS,first.year,last.year), by=c('lear'='LEAR_ID')) %>%
  left_join(ucr.offenses %>% dplyr::select(population,actual_index_violent,ori,year),
            by=c('ORI7'='ori', 'year'='year'))  %>%
  mutate(first.year.index = as.numeric(first.year)) %>%
  mutate(last.year.index = as.numeric(last.year)) %>%
  filter(!is.na(first.year.index) & !is.na(last.year.index) & !is.na(year)) %>%
  filter(year>=first.year.index) %>%
  filter(year<=last.year.index) %>%
  mutate(violPerCap = actual_index_violent/(population/100000),
         shotPerCap = n.shootings/(population/100000),
         pop100k = population/100000) %>%
  left_join(dem.voteshare %>% dplyr::select(county_fips,year,demshare),
            by=c('FIPS'='county_fips', 'year'='year')) %>%
  left_join(police.employees %>% dplyr::select(ORI7,year,total.employees,total.officers,total.civilians),
            by = c('ORI7' = 'ORI7', 'year'='year')) %>%
  mutate(copsPerCap = total.officers/(population/100000))
rm('years','lears')


bar <- department.yearly.data %>%
  group_by(ORI7) %>%
  summarise(tot = sum(n.shootings))


############################################
############################################
##
##    Figure 4.2
##
############################################
############################################


pdf('FinalFigures/fig4-1.pdf',7,5.5)
department.yearly.data %>%
  mutate(totalYears = ifelse(!is.na(first.year), as.numeric(last.year)-as.numeric(first.year)+1,0)) %>%
  group_by(lear) %>%
  summarize(n=sum(n.shootings)) %>%
  left_join(city.data %>% dplyr::select(LEAR_ID,totalYears,fullLoc), by=c('lear'='LEAR_ID')) %>%
  mutate(avgN = n/totalYears) %>%
  ggplot() +
  geom_histogram(aes(x=avgN)) +
  theme_tufte() +
  theme(plot.caption = element_text(hjust = 0), 
        axis.line = element_line(color = 'black')) +
  labs(title = "Annual Police Shootings") +
  geom_label(aes(x = 22, y = 32, label = "Chicago"), 
             hjust = 0, 
             vjust = 0.5, 
             lineheight = 0.8,
             colour = "#555555", 
             fill = "white", 
             label.size = NA, 
             family="Helvetica", 
             size = 5) +
  geom_curve(aes(x = 26, y = 30, xend = 31, yend = 1), 
             colour = "#555555", 
             size=0.75, 
             curvature = -0.2,
             arrow = arrow(length = unit(0.03, "npc"))) +
  geom_label(aes(x = 19, y = 16, label = "Houston"), 
             hjust = 0, 
             vjust = 0.5, 
             lineheight = 0.8,
             colour = "#555555", 
             fill = "white", 
             label.size = NA, 
             family="Helvetica", 
             size = 5) +
  geom_curve(aes(x = 23, y = 14, xend = 29.3, yend = 1.5), 
             colour = "#555555", 
             size=0.75, 
             curvature = -0.3,
             arrow = arrow(length = unit(0.03, "npc"))) +
  labs(x='Average Annual Shootings',y='Count of Departments') +
  scale_x_continuous(expand = c(0,0)) +
  scale_y_continuous(expand = c(0,0))
dev.off()


############################################
############################################
##
##    Figure 4.2
##
############################################
############################################

pdf('FinalFigures/fig4-2.pdf',7,5.5)
department.yearly.data %>%
  mutate(totalYears = ifelse(!is.na(first.year), 
                             as.numeric(last.year)-as.numeric(first.year)+1,
                             0)) %>%
  group_by(lear) %>%
  summarize(meanShotPerCap = mean(shotPerCap),
            totalYears = mean(totalYears)) %>%
  left_join(city.data %>% dplyr::select(LEAR_ID,totalYears,fullLoc), 
            by=c('lear'='LEAR_ID')) %>%
  ggplot() + 
  geom_histogram(aes(x=meanShotPerCap)) +
  #theme_igray() +
  theme_tufte() +
  theme(plot.caption = element_text(hjust = 0), 
        axis.line = element_line(color = 'black')) +
  geom_label(aes(x = 2.7, y = 9, label = "Pueblo"), 
             hjust = 0, 
             vjust = 0.5, 
             lineheight = 0.8,
             colour = "#555555", 
             fill = "white", 
             label.size = NA, 
             family="Helvetica", 
             size = 5) +
  geom_curve(aes(x = 2.8, y = 8, xend = 3, yend = 1), 
             colour = "#555555", 
             size=0.75, 
             curvature = 0.2,
             arrow = arrow(length = unit(0.03, "npc"))) +
  geom_label(aes(x = 1.7, y = 20, label = "Chicago"), 
             hjust = 0, 
             vjust = 0.5, 
             lineheight = 0.8,
             colour = "#555555", 
             fill = "white", 
             label.size = NA, 
             family="Helvetica", 
             size = 5) +
  geom_curve(aes(x = 1.7, y = 20, xend = 1.13, yend = 11), 
             colour = "#555555", 
             size=0.75, 
             curvature = 0.2,
             arrow = arrow(length = unit(0.03, "npc"))) +
  xlim(-.5,4) +
  labs(title = "Rate of Police Shootings",
       x='Average Annual Shootings Per Capita',y='Count of Departments') +
  scale_x_continuous(expand = c(0,0)) +
  scale_y_continuous(expand = c(0,0))
dev.off()



############################################
############################################
##
##    Figure 4.3
##
############################################
############################################
ds <- department.yearly.data%>%
  filter(year<2021) %>%
  mutate(shotCat = ifelse(n.shootings==0,'0',
                          ifelse(n.shootings==1,'1',
                                 ifelse(n.shootings==2,'2',
                                        ifelse(n.shootings==3,'3',
                                               ifelse(n.shootings==4,'4',
                                                      ifelse(n.shootings==5,'5',
                                                             'More than 5'))))))) %>%
  mutate(shotCat=factor(shotCat, levels=c('0','1','2','3',
                                          '4','5','More than 5')))

pdf('FinalFigures/fig4-3.pdf',7,5.5)
ggplot(ds, aes(x = year, fill = shotCat)) + 
  geom_bar(position = position_stack(reverse = TRUE)) +
  #bbc_style() +
  theme_tufte()+
  theme(plot.caption = element_text(hjust = 0), 
        axis.line = element_line(color = 'black')) +
  labs(x = "Year",y="Count of Departments",
       fill = "Total Shootings") +
  labs(title='Count of Annual Police Shootings per Department',
       # subtitle = 'Bars count how many departments provided records and show\nhow many shootings occured each year',
       x = 'Year') +
  scale_fill_viridis_d(direction = -1,option = "A") +
  guides(fill = guide_legend(reverse=T)) +
  scale_x_continuous(expand = c(0,0)) +
  scale_y_continuous(expand = c(0,0))
dev.off()


############################################
############################################
##
##    Figure 4.4
##
############################################
############################################

#subset data to avoid missingness across models
residual.model.data <- department.yearly.data[!is.na(department.yearly.data$n.shootings)
                                              & !is.na(department.yearly.data$actual_index_violent)
                                              & !is.na(department.yearly.data$total.officers)
                                              & !is.na(department.yearly.data$demshare),]

# residual models
residual.model.data$shoot.res.crime <- residuals(felm(n.shootings~1+demshare+total.officers+pop100k|year, 
                                                      data = residual.model.data))
residual.model.data$shoot.res.dem <- residuals(felm(n.shootings~1+actual_index_violent+total.officers+pop100k|year, 
                                                    data = residual.model.data))
residual.model.data$shoot.res.cop <- residuals(felm(n.shootings~1+actual_index_violent+demshare+pop100k|year, 
                                                    data = residual.model.data))
residual.model.data$crime.res <- residuals(felm(actual_index_violent~1+demshare+total.officers+pop100k|year, 
                                                data = residual.model.data))
residual.model.data$dem.res <- residuals(felm(demshare~1+actual_index_violent+total.officers+pop100k|year, 
                                              data = residual.model.data))
residual.model.data$cop.res <- residuals(felm(total.officers~1+demshare+actual_index_violent+pop100k|year, 
                                              data = residual.model.data))


# merge in department names for plotting later
combined.data <- residual.model.data %>%
  left_join(city.data %>% dplyr::select(ORI7,fullLoc), by=c('ORI7'='ORI7'))
colorPal <- c('darkred','darkblue', 'darkgreen','grey')


pdf('FinalFigures/fig4-4.pdf',6,4.5)
combined.data %>%
  ggplot() +
  geom_point(data=combined.data %>% filter(!(fullLoc %in% 
                                               c('LOS ANGELES, CA','CHICAGO, IL','HOUSTON, TX'))),
             aes(x=crime.res,
                 y=shoot.res.crime, 
                 colour = 'grey', alpha = .7)) +
  geom_smooth(aes(x=crime.res,y=shoot.res.crime)
              ,method = "loess", se = TRUE, color='black') +
  geom_text(data=combined.data %>% 
              filter(fullLoc == 'CHICAGO, IL'), 
            aes(x=crime.res,y=shoot.res.crime, label='C')) +
  geom_text(data=combined.data %>% 
              filter(fullLoc == 'HOUSTON, TX'), 
            aes(x=crime.res,y=shoot.res.crime, label='H')) +
  geom_text(data=combined.data %>% 
              filter(fullLoc == 'LOS ANGELES, CA'), 
            aes(x=crime.res,y=shoot.res.crime, label='LA')) +
  theme_tufte() + 
  theme(plot.caption = element_text(hjust = 0), 
        axis.line = element_line(color = 'black')) +
  scale_color_manual(values=c("Los Angeles"="gold","Chicago"= "darkred", "Houston" = "darkgreen" ,"Others"="black")) +
  theme(legend.position="top") +
  guides(alpha = FALSE) +
  theme(axis.title.x = element_text(size = 12), 
        axis.title.y = element_text(size = 12)) +
  labs(title = "Violent Crime and Annual Shootings",
       x = "Difference from average violent crime rate",
       y = "Difference from expected number of\npolice shootings")
dev.off()

############################################
############################################
##
##    Figure 4.5
##
############################################
############################################



pdf('FinalFigures/fig4-5.pdf',6,4.5)
department.yearly.data %>%
  mutate(shotCat = ifelse(n.shootings==0,'0',
                          ifelse(n.shootings==1,'1',
                                 ifelse(n.shootings==2,'2',
                                        ifelse(n.shootings==3,'3',
                                               ifelse(n.shootings==4,'4',
                                                      ifelse(n.shootings==5,'5',
                                                             'More than 5'))))))) %>%
  mutate(shotCat=factor(shotCat, levels=c('0','1','2','3',
                                          '4','5','More than 5'))) %>%
  ggplot(aes(demshare, fill = shotCat)) +
  geom_histogram(binwidth = .025,position = position_stack(reverse = TRUE)) +
  scale_fill_viridis_d(direction = -1,option = "A") +
  theme_tufte() +
  theme(plot.caption = element_text(hjust = 0), 
        axis.line = element_line(color = 'black')) +
  scale_x_continuous(expand = c(0,0)) +
  scale_y_continuous(expand = c(0,0)) +
  labs(title='City Partisanship and Police Shootings',
       x='Most Recent Democratic Vote Share',
       y='Count of Departments',
       fill = 'Total Shootings') +
  guides(fill = guide_legend(reverse=T)) +
  theme(plot.caption = element_text(hjust = 0)) 

dev.off()


############################################
############################################
##
##    Figure 4.6
##
############################################
############################################
pdf('FinalFigures/fig4-6.pdf',6,4.5)
department.yearly.data %>%
  mutate(shotCat = ifelse(n.shootings==0,'0',
                          ifelse(n.shootings==1,'1',
                                 ifelse(n.shootings==2,'2',
                                        ifelse(n.shootings==3,'3',
                                               ifelse(n.shootings==4,'4',
                                                      ifelse(n.shootings==5,'5',
                                                             'More than 5'))))))) %>%
  mutate(shotCat=factor(shotCat, levels=c('0','1','2','3',
                                          '4','5','More than 5'))) %>%
  filter(!is.na(copsPerCap)) %>%
  ggplot(aes(copsPerCap, fill = shotCat)) +
  geom_histogram(binwidth = 10,position = position_stack(reverse = TRUE)) +
  scale_fill_viridis_d(direction = -1,option = "A") +
  theme_tufte() +
  theme(plot.caption = element_text(hjust = 0), 
        axis.line = element_line(color = 'black')) +
  theme(axis.title.x = element_text(size = 12)) +
  guides(fill = guide_legend(reverse=T)) +
  labs(title='Number of Officers and Police Shootings',
       y='Count of Departments',
       x = 'Officers Employed Per 100,000 Residents',
       fill = 'Total Shootings') +
  theme(plot.caption = element_text(hjust = 0)) +
  geom_label(aes(x = 460, y = 50, label = "Washington, DC"), 
             hjust = 0, 
             vjust = 0.5, 
             lineheight = 0.8,
             colour = "#555555", 
             fill = "white", 
             label.size = NA, 
             family="Helvetica", 
             size = 5) +
  geom_curve(aes(x = 580, y = 45, xend = 680, yend = 2), 
             colour = "#555555", 
             size=0.75, 
             curvature = -0.2,
             arrow = arrow(length = unit(0.03, "npc"))) +
  scale_x_continuous(expand = c(0,0)) +
  scale_y_continuous(expand = c(0,0))
dev.off()


############################################
############################################
##
##    Figure 4.7
##
############################################
############################################
pdf('FinalFigures/fig4-7.pdf',6,4.5)
combined.data %>%
  ggplot() +
  geom_point(data=combined.data %>%   filter(!(fullLoc %in% c('LOS ANGELES, CA','CHICAGO, IL','HOUSTON, TX'))),aes(x=cop.res,y=shoot.res.cop, 
                                                                                                                   colour ='grey', alpha = .7)) +
  geom_smooth(aes(x=cop.res,y=shoot.res.cop)
              ,method = "loess", se = TRUE, color='black') +
  geom_text(data=combined.data %>% filter(fullLoc == 'CHICAGO, IL'), 
            aes(x=cop.res,y=shoot.res.cop, label='C')) +
  geom_text(data=combined.data %>% filter(fullLoc == 'HOUSTON, TX'), 
            aes(x=cop.res,y=shoot.res.cop, label='H')) +
  geom_text(data=combined.data %>% filter(fullLoc == 'LOS ANGELES, CA'), 
            aes(x=cop.res,y=shoot.res.cop, label='LA')) +
  theme_tufte() + 
  theme(plot.caption = element_text(hjust = 0), 
        axis.line = element_line(color = 'black')) +
  scale_color_manual(values=c("Los Angeles"="gold","Chicago"= "darkred", "Houston" = "darkgreen" ,"Others"="black")) +
  theme(legend.position="top") +
  guides(alpha = FALSE) +
  theme(axis.title.x = element_text(size = 12), 
        axis.title.y = element_text(size = 12)) +
  labs(title = "Number of Officers and Annual Shootings",
       x = "Difference from average number of police employed",
       y = "Difference from expected number of\npolice shootings") 
dev.off()